gusucode.com > 《MATLAB图像与视频处理实用案例详解》代码 > 《MATLAB图像与视频处理实用案例详解》代码/第 19 章 基于语音识别的信号灯图像模拟控制技术/voicebox/readsph.m

    function [y,fs,wrd,phn,ffx]=readsph(filename,mode,nmax,nskip)
%READSPH  Read a SPHERE/TIMIT format sound file [Y,FS,WRD,PHN,FFX]=(FILENAME,MODE,NMAX,NSKIP)
%
% Input Parameters:
%
%	FILENAME gives the name of the file (with optional .SPH extension) or alternatively
%                 can be the FFX output from a previous call to READSPH having the 'f' mode option
%	MODE		specifies the following (*=default):
%
%    Scaling: 's'    Auto scale to make data peak = +-1 (use with caution if reading in chunks)
%             'r'    Raw unscaled data (integer values)
%             'p' *	Scaled to make +-1 equal full scale
%             'o'    Scale to bin centre rather than bin edge (e.g. 127 rather than 127.5 for 8 bit values)
%                     (can be combined with n+p,r,s modes)
%             'n'    Scale to negative peak rather than positive peak (e.g. 128.5 rather than 127.5 for 8 bit values)
%                     (can be combined with o+p,r,s modes)
%   Format    'l'    Little endian data (Intel,DEC) (overrides indication in file)
%             'b'    Big endian data (non Intel/DEC) (overrides indication in file)
%
%   File I/O: 'f'    Do not close file on exit
%             'd'    Look in data directory: voicebox('dir_data')
%             'w'    Also read the annotation file *.wrd if present (as in TIMIT)
%             't'    Also read the phonetic transcription file *.phn if present (as in TIMIT)
%                    Eac line of the annotation and transcription files is of the form: m n token
%                    where m and n are start end end times in samples and token is a word or phoneme test descriptor
%                    The corresponding cell arrays WRD and PHN contain two elements per row: {[m n]/fs 'token'}
%                    These outputs are only present if the corresponding 'w' and 't' options are selected
%
%	NMAX     maximum number of samples to read (or -1 for unlimited [default])
%	NSKIP    number of samples to skip from start of file
%               (or -1 to continue from previous read when FFX is given instead of FILENAME [default])
%
% Output Parameters:
%
%	Y          data matrix of dimension (samples,channels)
%	FS         sample frequency in Hz
%	WRD{*,2}   cell array with word annotations: WRD{*,:)={[t_start t_end],'text'} where times are in seconds
%              only present if 'w' option is given
%	PHN{*,2}   cell array with phoneme annotations: PHN{*,:)={[t_start	t_end],'phoneme'} where times are in seconds
%              only present if 't' option is present
%	FFX        Cell array containing
%
%     {1}     filename
%     {2}     header information
%        {1}  first header field name
%        {2}  first header field value
%     {3}     format string (e.g. NIST_1A)
%     {4}(1)  file id
%        (2)  current position in file
%        (3)  dataoff	byte offset in file to start of data
%        (4)  order  byte order (l or b)
%        (5)  nsamp	number of samples
%        (6)  number of channels
%        (7)  nbytes	bytes per data value
%        (8)  bits	number of bits of precision
%        (9)  fs	sample frequency
%		 (10) min value
%        (11) max value
%        (12) coding: 0=PCM,1=uLAW + 0=no compression,10=shorten,20=wavpack,30=shortpack
%        (13) file not yet decompressed
%     {5}     temporary filename
%
%   If no output parameters are specified, header information will be printed.
%   To decode shorten-encoded files, the program shorten.exe must be in the same directory as this m-file
%
%  Usage Examples:
%
% (a) Draw an annotated spectrogram of a TIMIT file
%           filename='....TIMIT/TEST/DR1/FAKS0/SA1.WAV';
%           [s,fs,wrd,phn]=readsph(filename,'wt');
%           spgrambw(s,fs,'Jwcpta',[],[],[],[],wrd);

%	   Copyright (C) Mike Brookes 1998
%      Version: $Id: readsph.m,v 1.16 2011/09/02 16:25:19 dmb Exp $
%
%   VOICEBOX is a MATLAB toolbox for speech processing.
%   Home page: http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%   This program is free software; you can redistribute it and/or modify
%   it under the terms of the GNU General Public License as published by
%   the Free Software Foundation; either version 2 of the License, or
%   (at your option) any later version.
%
%   This program is distributed in the hope that it will be useful,
%   but WITHOUT ANY WARRANTY; without even the implied warranty of
%   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
%   GNU General Public License for more details.
%
%   You can obtain a copy of the GNU General Public License from
%   http://www.gnu.org/copyleft/gpl.html or by writing to
%   Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

persistent BYTEORDER
codes={'sample_count'; 'channel_count';  'sample_n_bytes';'sample_sig_bits'; 'sample_rate'; 'sample_min'; 'sample_max'};
codings={'pcm'; 'ulaw'};
compressions={',embedded-shorten-';',embedded-wavpack-'; ',embedded-shortpack-'};
if isempty(BYTEORDER), BYTEORDER='l'; end
if nargin<1, error('Usage: [y,fs,hdr,fidx]=READSPH(filename,mode,nmax,nskip)'); end
if nargin<2, mode='p';
else mode = [mode(:).' 'p'];
end
k=find((mode>='p') & (mode<='s'));
mno=all(mode~='o');                      % scale to input limits not output limits
sc=mode(k(1));
if any(mode=='l'), BYTEORDER='l';
elseif any(mode=='b'), BYTEORDER='b';
end
if nargout
    ffx=cell(5,1);
    if ischar(filename)
        if any(mode=='d')
            filename=fullfile(voicebox('dir_data'),filename);
        end
        fid=fopen(filename,'rb',BYTEORDER);
        if fid == -1
            fn=[filename,'.sph'];
            fid=fopen(fn,'rb',BYTEORDER);
            if fid ~= -1, filename=fn; end
        end
        if fid == -1
            error('Can''t open %s for input',filename);
        end
        ffx{1}=filename;
    else
        if iscell(filename)
            ffx=filename;
        else
            fid=filename;
        end
    end

    if isempty(ffx{4});
        fseek(fid,0,-1);
        str=char(fread(fid,16)');
        if str(8) ~= 10 || str(16) ~= 10, fclose(fid); error('File does not begin with a SPHERE header'); end
        ffx{3}=str(1:7);
        hlen=str2double(str(9:15));
        hdr={};
        while 1
            str=fgetl(fid);
            if str(1) ~= ';'
                [tok,str]=strtok(str);
                if strcmp(tok,'end_head'), break; end
                hdr(end+1,1)={tok};
                [tok,str]=strtok(str);
                if tok(1) ~= '-', error('Missing ''-'' in SPHERE header'); end
                if tok(2)=='s'
                    hdr(end,2)={str(2:str2num(tok(3:end))+1)};
                elseif tok(2)=='i'
                    hdr(end,2)={sscanf(str,'%d',1)};
                else
                    hdr(end,2)={sscanf(str,'%f',1)};
                end
            end
        end
        i=find(strcmp(hdr(:,1),'sample_byte_format'));
        if ~isempty(i)
            bord=char('b'+('l'-'b')*(hdr{i,2}(1)=='0'));
            if bord ~= BYTEORDER && mode~='b' && mode ~='l'
                BYTEORDER=bord;
                fclose(fid);
                fid=fopen(filename,'rb',BYTEORDER);
            end
        end
        i=find(strcmp(hdr(:,1),'sample_coding'));
        icode=0;                % initialize to PCM coding
        if ~isempty(i)
            icode=-1;                   % unknown code
            scode=hdr{i,2};
            nscode=length(scode);
            for j=1:length(codings)
                lenj=length(codings{j});
                if strcmp(scode(1:min(nscode,lenj)),codings{j})
                    if nscode>lenj
                        for k=1:length(compressions)
                            lenk=length(compressions{k});
                            if strcmp(scode(lenj+1:min(lenj+lenk,nscode)),compressions{k})
                                icode=10*k+j-1;
                                break;
                            end
                        end
                    else
                        icode=j-1;
                    end
                    break;
                end
            end
        end

        info=[fid; 0; hlen; double(BYTEORDER); 0; 1; 2; 16; 1 ; 1; -1; icode];
        for j=1:7
            i=find(strcmp(hdr(:,1),codes{j}));
            if ~isempty(i)
                info(j+4)=hdr{i,2};
            end
        end
        if ~info(5)
            fseek(fid,0,1);
            info(5)=floor((ftell(fid)-info(3))/(info(6)*info(7)));
        end
        ffx{2}=hdr;
        ffx{4}=info;
    end
    info=ffx{4};
    if nargin<4, nskip=info(2);
    elseif nskip<0, nskip=info(2);
    end

    ksamples=info(5)-nskip;
    if nargin>2
        if nmax>=0
            ksamples=min(nmax,ksamples);
        end
    end

    if ksamples>0
        fid=info(1);
        if icode>=10 && isempty(ffx{5}) %#ok<AND2>
            fclose(fid);
            dirt=voicebox('dir_temp');
            [fnp,fnn,fne,fnv]=fileparts(filename);
            filetemp=fullfile(dirt,[fnn fne fnv]);
            cmdtemp=fullfile(dirt,'shorten.bat');               % batch file needed to convert to short filenames
            % if ~exist(cmdtemp,'file')                   % write out the batch file if it doesn't exist
                cmdfid=fopen(cmdtemp,'wt');
                fprintf(cmdfid,'@"%s" -x -a %%1 "%%~s2" "%%~s3"\n',voicebox('shorten'));
                fclose(cmdfid);
            % end
            if exist(filetemp,'file')                          % need to explicitly delete old file since shorten makes read-only
                doscom=['del /f "' filetemp '"'];
                if dos(doscom) % run the program
                    error('Error running DOS command: %s',doscom);
                end
            end
            if floor(icode/10)==1               % shorten
                doscom=['"' cmdtemp '" ' num2str(info(3)) ' "' filename '" "' filetemp '"'];
                %                     fprintf(1,'Executing: %s\n',doscom);
                if dos(doscom) % run the program
                    error('Error running DOS command: %s',doscom);
                end
            else
                error('unknown compression format');
            end
            ffx{5}=filetemp;
            fid=fopen(filetemp,'r',BYTEORDER);
            if fid<0, error('Cannot open decompressed file %s',filetemp); end
            info(1)=fid;                            % update fid
        end
        info(2)=nskip+ksamples;
        pk=pow2(0.5,8*info(7))*(1+(mno/2-all(mode~='n'))/pow2(0.5,info(8)));  % use modes o and n to determine effective peak
        fseek(fid,info(3)+info(6)*info(7)*nskip,-1);
        nsamples=info(6)*ksamples;
        if info(7)<3
            if info(7)<2
                y=fread(fid,nsamples,'uchar');
                if info(12)==1
                    y=pcmu2lin(y);
                    pk=2.005649;
                else
                    y=y-128;
                end
            else
                y=fread(fid,nsamples,'short');
            end
        else
            if info(7)<4
                y=fread(fid,3*nsamples,'uchar');
                y=reshape(y,3,nsamples);
                y=[1 256 65536]*y-pow2(fix(pow2(y(3,:),-7)),24);
            else
                y=fread(fid,nsamples,'long');
            end
        end
        if sc ~= 'r'
            if sc=='s'
                if info(10)>info(11)
                    info(10)=min(y);
                    info(11)=max(y);
                end
                sf=1/max(max(abs(info(10:11))),1);
            else sf=1/pk;
            end
            y=sf*y;
        end
        if info(6)>1, y = reshape(y,info(6),ksamples).'; end
    else
        y=[];
    end

    if mode~='f'
        fclose(fid);
        info(1)=-1;
        if ~isempty(ffx{5})
            doscom=['del /f ' ffx{5}];
            if dos(doscom) % run the program
                error('Error running DOS command: %s',doscom);
            end
            ffx{5}=[];
        end
    end
    ffx{4}=info;
    fs=info(9);
    wrd=ffx;        % copy ffx into the other arguments in case 'w' and/or 't' are not specified
    phn=ffx;
    if any(mode=='w')
        wrd=cell(0,0);
        fidw=fopen([filename(1:end-3) 'wrd'],'r');
        if fidw>0
            while 1
                tline = fgetl(fidw); % read an input line
                if ~ischar(tline)
                    break
                end
                [wtim, ntim, ee, nix] = sscanf(tline,'%d%d',2);
                if ntim==2
                    wrd{end+1,1}=wtim(:)'/fs;
                    wrd{end,2}=strtrim(tline(nix:end));
                end
            end
            fclose(fidw);
        end
    end
    if any(mode=='t')
        ph=cell(0,0);
        fidw=fopen([filename(1:end-3) 'phn'],'r');
        if fidw>0
            while 1
                tline = fgetl(fidw); % read an input line
                if ~ischar(tline)
                    break
                end
                [wtim, ntim, ee, nix] = sscanf(tline,'%d%d',2);
                if ntim==2
                    ph{end+1,1}=wtim(:)'/fs;
                    ph{end,2}=strtrim(tline(nix:end));
                end
            end
            fclose(fidw);
        end
        if any(mode=='w')
            phn=ph;             % copy into 4th argument
        else
            wrd=ph;             % copy into 3rd argument
        end
    end
else
    [y1,fs,ffx]=readsph(filename,mode,0);
    info=ffx{4};
    if ~isempty(ffx{1}), fprintf(1,'Filename: %s\n',ffx{1}); end
    fprintf(1,'Sphere file type: %s\n',ffx{3});
    fprintf(1,'Duration = %ss: %d channel * %d samples @ %sHz\n',sprintsi(info(5)/info(9)),info(6),info(5),sprintsi(info(9)));
end